// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008 Guillaume Saupin <guillaume.saupin@cea.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef EIGEN_SKYLINEINPLACELU_H
#define EIGEN_SKYLINEINPLACELU_H

// IWYU pragma: private
#include "./InternalHeaderCheck.h"

namespace Eigen {

/** \ingroup Skyline_Module
 *
 * \class SkylineInplaceLU
 *
 * \brief Inplace LU decomposition of a skyline matrix and associated features
 *
 * \param MatrixType the type of the matrix of which we are computing the LU factorization
 *
 */
template <typename MatrixType>
class SkylineInplaceLU {
 protected:
  typedef typename MatrixType::Scalar Scalar;
  typedef typename MatrixType::Index Index;

  typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;

 public:
  /** Creates a LU object and compute the respective factorization of \a matrix using
   * flags \a flags. */
  SkylineInplaceLU(MatrixType& matrix, int flags = 0)
      : /*m_matrix(matrix.rows(), matrix.cols()),*/ m_flags(flags), m_status(0), m_lu(matrix) {
    m_precision = RealScalar(0.1) * Eigen::dummy_precision<RealScalar>();
    m_lu.IsRowMajor ? computeRowMajor() : compute();
  }

  /** Sets the relative threshold value used to prune zero coefficients during the decomposition.
   *
   * Setting a value greater than zero speeds up computation, and yields to an incomplete
   * factorization with fewer non zero coefficients. Such approximate factors are especially
   * useful to initialize an iterative solver.
   *
   * Note that the exact meaning of this parameter might depends on the actual
   * backend. Moreover, not all backends support this feature.
   *
   * \sa precision() */
  void setPrecision(RealScalar v) { m_precision = v; }

  /** \returns the current precision.
   *
   * \sa setPrecision() */
  RealScalar precision() const { return m_precision; }

  /** Sets the flags. Possible values are:
   *  - CompleteFactorization
   *  - IncompleteFactorization
   *  - MemoryEfficient
   *  - one of the ordering methods
   *  - etc...
   *
   * \sa flags() */
  void setFlags(int f) { m_flags = f; }

  /** \returns the current flags */
  int flags() const { return m_flags; }

  void setOrderingMethod(int m) { m_flags = m; }

  int orderingMethod() const { return m_flags; }

  /** Computes/re-computes the LU factorization */
  void compute();
  void computeRowMajor();

  /** \returns the lower triangular matrix L */
  // inline const MatrixType& matrixL() const { return m_matrixL; }

  /** \returns the upper triangular matrix U */
  // inline const MatrixType& matrixU() const { return m_matrixU; }

  template <typename BDerived, typename XDerived>
  bool solve(const MatrixBase<BDerived>& b, MatrixBase<XDerived>* x, const int transposed = 0) const;

  /** \returns true if the factorization succeeded */
  inline bool succeeded(void) const { return m_succeeded; }

 protected:
  RealScalar m_precision;
  int m_flags;
  mutable int m_status;
  bool m_succeeded;
  MatrixType& m_lu;
};

/** Computes / recomputes the in place LU decomposition of the SkylineInplaceLU.
 * using the default algorithm.
 */
template <typename MatrixType>
// template<typename Scalar_>
void SkylineInplaceLU<MatrixType>::compute() {
  const size_t rows = m_lu.rows();
  const size_t cols = m_lu.cols();

  eigen_assert(rows == cols && "We do not (yet) support rectangular LU.");
  eigen_assert(!m_lu.IsRowMajor && "LU decomposition does not work with rowMajor Storage");

  for (Index row = 0; row < rows; row++) {
    const double pivot = m_lu.coeffDiag(row);

    // Lower matrix Columns update
    const Index& col = row;
    for (typename MatrixType::InnerLowerIterator lIt(m_lu, col); lIt; ++lIt) {
      lIt.valueRef() /= pivot;
    }

    // Upper matrix update -> contiguous memory access
    typename MatrixType::InnerLowerIterator lIt(m_lu, col);
    for (Index rrow = row + 1; rrow < m_lu.rows(); rrow++) {
      typename MatrixType::InnerUpperIterator uItPivot(m_lu, row);
      typename MatrixType::InnerUpperIterator uIt(m_lu, rrow);
      const double coef = lIt.value();

      uItPivot += (rrow - row - 1);

      // update upper part  -> contiguous memory access
      for (++uItPivot; uIt && uItPivot;) {
        uIt.valueRef() -= uItPivot.value() * coef;

        ++uIt;
        ++uItPivot;
      }
      ++lIt;
    }

    // Upper matrix update -> non contiguous memory access
    typename MatrixType::InnerLowerIterator lIt3(m_lu, col);
    for (Index rrow = row + 1; rrow < m_lu.rows(); rrow++) {
      typename MatrixType::InnerUpperIterator uItPivot(m_lu, row);
      const double coef = lIt3.value();

      // update lower part ->  non contiguous memory access
      for (Index i = 0; i < rrow - row - 1; i++) {
        m_lu.coeffRefLower(rrow, row + i + 1) -= uItPivot.value() * coef;
        ++uItPivot;
      }
      ++lIt3;
    }
    // update diag -> contiguous
    typename MatrixType::InnerLowerIterator lIt2(m_lu, col);
    for (Index rrow = row + 1; rrow < m_lu.rows(); rrow++) {
      typename MatrixType::InnerUpperIterator uItPivot(m_lu, row);
      typename MatrixType::InnerUpperIterator uIt(m_lu, rrow);
      const double coef = lIt2.value();

      uItPivot += (rrow - row - 1);
      m_lu.coeffRefDiag(rrow) -= uItPivot.value() * coef;
      ++lIt2;
    }
  }
}

template <typename MatrixType>
void SkylineInplaceLU<MatrixType>::computeRowMajor() {
  const size_t rows = m_lu.rows();
  const size_t cols = m_lu.cols();

  eigen_assert(rows == cols && "We do not (yet) support rectangular LU.");
  eigen_assert(m_lu.IsRowMajor && "You're trying to apply rowMajor decomposition on a ColMajor matrix !");

  for (Index row = 0; row < rows; row++) {
    typename MatrixType::InnerLowerIterator llIt(m_lu, row);

    for (Index col = llIt.col(); col < row; col++) {
      if (m_lu.coeffExistLower(row, col)) {
        const double diag = m_lu.coeffDiag(col);

        typename MatrixType::InnerLowerIterator lIt(m_lu, row);
        typename MatrixType::InnerUpperIterator uIt(m_lu, col);

        const Index offset = lIt.col() - uIt.row();

        Index stop = offset > 0 ? col - lIt.col() : col - uIt.row();

        // #define VECTORIZE
#ifdef VECTORIZE
        Map<VectorXd> rowVal(lIt.valuePtr() + (offset > 0 ? 0 : -offset), stop);
        Map<VectorXd> colVal(uIt.valuePtr() + (offset > 0 ? offset : 0), stop);

        Scalar newCoeff = m_lu.coeffLower(row, col) - rowVal.dot(colVal);
#else
        if (offset > 0)  // Skip zero value of lIt
          uIt += offset;
        else  // Skip zero values of uIt
          lIt += -offset;
        Scalar newCoeff = m_lu.coeffLower(row, col);

        for (Index k = 0; k < stop; ++k) {
          const Scalar tmp = newCoeff;
          newCoeff = tmp - lIt.value() * uIt.value();
          ++lIt;
          ++uIt;
        }
#endif

        m_lu.coeffRefLower(row, col) = newCoeff / diag;
      }
    }

    // Upper matrix update
    const Index col = row;
    typename MatrixType::InnerUpperIterator uuIt(m_lu, col);
    for (Index rrow = uuIt.row(); rrow < col; rrow++) {
      typename MatrixType::InnerLowerIterator lIt(m_lu, rrow);
      typename MatrixType::InnerUpperIterator uIt(m_lu, col);
      const Index offset = lIt.col() - uIt.row();

      Index stop = offset > 0 ? rrow - lIt.col() : rrow - uIt.row();

#ifdef VECTORIZE
      Map<VectorXd> rowVal(lIt.valuePtr() + (offset > 0 ? 0 : -offset), stop);
      Map<VectorXd> colVal(uIt.valuePtr() + (offset > 0 ? offset : 0), stop);

      Scalar newCoeff = m_lu.coeffUpper(rrow, col) - rowVal.dot(colVal);
#else
      if (offset > 0)  // Skip zero value of lIt
        uIt += offset;
      else  // Skip zero values of uIt
        lIt += -offset;
      Scalar newCoeff = m_lu.coeffUpper(rrow, col);
      for (Index k = 0; k < stop; ++k) {
        const Scalar tmp = newCoeff;
        newCoeff = tmp - lIt.value() * uIt.value();

        ++lIt;
        ++uIt;
      }
#endif
      m_lu.coeffRefUpper(rrow, col) = newCoeff;
    }

    // Diag matrix update
    typename MatrixType::InnerLowerIterator lIt(m_lu, row);
    typename MatrixType::InnerUpperIterator uIt(m_lu, row);

    const Index offset = lIt.col() - uIt.row();

    Index stop = offset > 0 ? lIt.size() : uIt.size();
#ifdef VECTORIZE
    Map<VectorXd> rowVal(lIt.valuePtr() + (offset > 0 ? 0 : -offset), stop);
    Map<VectorXd> colVal(uIt.valuePtr() + (offset > 0 ? offset : 0), stop);
    Scalar newCoeff = m_lu.coeffDiag(row) - rowVal.dot(colVal);
#else
    if (offset > 0)  // Skip zero value of lIt
      uIt += offset;
    else  // Skip zero values of uIt
      lIt += -offset;
    Scalar newCoeff = m_lu.coeffDiag(row);
    for (Index k = 0; k < stop; ++k) {
      const Scalar tmp = newCoeff;
      newCoeff = tmp - lIt.value() * uIt.value();
      ++lIt;
      ++uIt;
    }
#endif
    m_lu.coeffRefDiag(row) = newCoeff;
  }
}

/** Computes *x = U^-1 L^-1 b
 *
 * If \a transpose is set to SvTranspose or SvAdjoint, the solution
 * of the transposed/adjoint system is computed instead.
 *
 * Not all backends implement the solution of the transposed or
 * adjoint system.
 */
template <typename MatrixType>
template <typename BDerived, typename XDerived>
bool SkylineInplaceLU<MatrixType>::solve(const MatrixBase<BDerived>& b, MatrixBase<XDerived>* x,
                                         const int transposed) const {
  const size_t rows = m_lu.rows();
  const size_t cols = m_lu.cols();

  for (Index row = 0; row < rows; row++) {
    x->coeffRef(row) = b.coeff(row);
    Scalar newVal = x->coeff(row);
    typename MatrixType::InnerLowerIterator lIt(m_lu, row);

    Index col = lIt.col();
    while (lIt.col() < row) {
      newVal -= x->coeff(col++) * lIt.value();
      ++lIt;
    }

    x->coeffRef(row) = newVal;
  }

  for (Index col = rows - 1; col > 0; col--) {
    x->coeffRef(col) = x->coeff(col) / m_lu.coeffDiag(col);

    const Scalar x_col = x->coeff(col);

    typename MatrixType::InnerUpperIterator uIt(m_lu, col);
    uIt += uIt.size() - 1;

    while (uIt) {
      x->coeffRef(uIt.row()) -= x_col * uIt.value();
      // TODO : introduce --operator
      uIt += -1;
    }
  }
  x->coeffRef(0) = x->coeff(0) / m_lu.coeffDiag(0);

  return true;
}

}  // end namespace Eigen

#endif  // EIGEN_SKYLINEINPLACELU_H
